R Packages

packages <- c("tidyverse", "kableExtra", "broom", "broomExtra", "gganimate")
xfun::pkg_attach(packages, message = F)

Functions

nice_tables <- function(table) {
  table %>%
    kable() %>% 
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "100%", height = "200px")
}

Let’s first look at the regular season dataset.

Regular Season

df_tournament <- read_csv("data/NCAATourneyDetailedResults.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   WLoc = col_character()
## )
## See spec(...) for full column specifications.
glimpse(df_tournament)
## Observations: 1,048
## Variables: 34
## $ Season  <dbl> 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 20…
## $ DayNum  <dbl> 134, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 1…
## $ WTeamID <dbl> 1421, 1112, 1113, 1141, 1143, 1163, 1181, 1211, 1228, 12…
## $ WScore  <dbl> 92, 80, 84, 79, 76, 58, 67, 74, 65, 64, 72, 72, 70, 71, …
## $ LTeamID <dbl> 1411, 1436, 1272, 1166, 1301, 1140, 1161, 1153, 1443, 14…
## $ LScore  <dbl> 84, 51, 71, 73, 74, 53, 57, 69, 60, 61, 68, 71, 69, 54, …
## $ WLoc    <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "…
## $ NumOT   <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ WFGM    <dbl> 32, 31, 31, 29, 27, 17, 19, 20, 24, 28, 22, 28, 23, 24, …
## $ WFGA    <dbl> 69, 66, 59, 53, 64, 52, 54, 47, 56, 51, 51, 52, 54, 52, …
## $ WFGM3   <dbl> 11, 7, 6, 3, 7, 4, 4, 6, 5, 2, 9, 5, 3, 10, 8, 8, 6, 7, …
## $ WFGA3   <dbl> 29, 23, 14, 7, 20, 14, 13, 14, 14, 6, 16, 13, 13, 18, 24…
## $ WFTM    <dbl> 17, 11, 16, 18, 15, 20, 25, 28, 12, 6, 19, 11, 21, 13, 1…
## $ WFTA    <dbl> 26, 14, 22, 25, 23, 27, 31, 37, 14, 11, 23, 18, 25, 24, …
## $ WOR     <dbl> 14, 11, 10, 11, 18, 12, 13, 8, 15, 7, 11, 9, 11, 11, 12,…
## $ WDR     <dbl> 30, 36, 27, 20, 20, 29, 27, 28, 23, 20, 20, 32, 33, 24, …
## $ WAst    <dbl> 17, 22, 18, 15, 17, 8, 4, 12, 15, 13, 14, 7, 7, 16, 16, …
## $ WTO     <dbl> 12, 16, 9, 18, 13, 14, 16, 12, 14, 11, 10, 23, 20, 14, 1…
## $ WStl    <dbl> 5, 10, 7, 13, 8, 3, 10, 2, 11, 8, 4, 4, 6, 8, 4, 6, 7, 4…
## $ WBlk    <dbl> 3, 7, 4, 1, 2, 8, 8, 2, 4, 4, 3, 6, 6, 3, 8, 1, 2, 5, 0,…
## $ WPF     <dbl> 22, 8, 19, 19, 14, 16, 23, 15, 14, 17, 23, 19, 19, 12, 2…
## $ LFGM    <dbl> 29, 20, 25, 27, 25, 20, 18, 26, 22, 23, 24, 26, 23, 21, …
## $ LFGA    <dbl> 67, 64, 69, 60, 56, 64, 54, 66, 58, 56, 54, 65, 66, 52, …
## $ LFGM3   <dbl> 12, 4, 7, 7, 9, 2, 3, 10, 8, 6, 5, 8, 8, 6, 4, 9, 7, 6, …
## $ LFGA3   <dbl> 31, 16, 28, 17, 21, 17, 11, 27, 24, 17, 15, 19, 23, 20, …
## $ LFTM    <dbl> 14, 7, 14, 12, 15, 11, 18, 7, 8, 9, 15, 11, 15, 6, 25, 6…
## $ LFTA    <dbl> 31, 7, 21, 17, 20, 13, 22, 10, 13, 10, 25, 21, 20, 8, 32…
## $ LOR     <dbl> 17, 8, 20, 14, 10, 15, 11, 13, 17, 13, 14, 11, 14, 7, 17…
## $ LDR     <dbl> 28, 26, 22, 17, 26, 26, 24, 22, 18, 19, 20, 18, 23, 22, …
## $ LAst    <dbl> 16, 12, 11, 20, 16, 11, 8, 13, 10, 13, 14, 19, 15, 8, 7,…
## $ LTO     <dbl> 15, 17, 12, 21, 14, 11, 19, 10, 14, 13, 9, 13, 12, 18, 1…
## $ LStl    <dbl> 5, 10, 2, 6, 5, 8, 5, 7, 6, 6, 4, 6, 11, 7, 3, 3, 4, 8, …
## $ LBlk    <dbl> 0, 3, 5, 6, 8, 4, 4, 6, 5, 1, 1, 0, 3, 2, 0, 3, 0, 4, 4,…
## $ LPF     <dbl> 22, 15, 18, 21, 19, 22, 19, 24, 16, 15, 20, 19, 25, 23, …

We will also need to load the Teams.csv dataset to append actual team names rather than IDs.

df_teams <- read_csv("data/Teams.csv")
## Parsed with column specification:
## cols(
##   TeamID = col_double(),
##   TeamName = col_character(),
##   FirstD1Season = col_double(),
##   LastD1Season = col_double()
## )
glimpse(df_teams)
## Observations: 366
## Variables: 4
## $ TeamID        <dbl> 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 11…
## $ TeamName      <chr> "Abilene Chr", "Air Force", "Akron", "Alabama", "A…
## $ FirstD1Season <dbl> 2014, 1985, 1985, 1985, 2000, 1985, 2000, 1985, 19…
## $ LastD1Season  <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 19…
# select only the TeamID and TeamName vars
df_teams <- df_teams %>% select(TeamID, TeamName) 

# create df for win and loss TeamID and TeamNames to append to regular season df
df_wteams <- df_teams                         
colnames(df_wteams) <- c("WTeamID", "WTeamName")
df_lteams <- df_teams                        
colnames(df_lteams) <- c("LTeamID", "LTeamName")

# add actual names using inner_join
# remove TeamID
df_tournament <- df_tournament %>%
  inner_join(., df_wteams, by = "WTeamID") %>%
  inner_join(., df_lteams, by = "LTeamID") %>%
  select(-contains("TeamID"))

glimpse(df_tournament)
## Observations: 1,048
## Variables: 34
## $ Season    <dbl> 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, …
## $ DayNum    <dbl> 134, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,…
## $ WScore    <dbl> 92, 80, 84, 79, 76, 58, 67, 74, 65, 64, 72, 72, 70, 71…
## $ LScore    <dbl> 84, 51, 71, 73, 74, 53, 57, 69, 60, 61, 68, 71, 69, 54…
## $ WLoc      <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
## $ NumOT     <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ WFGM      <dbl> 32, 31, 31, 29, 27, 17, 19, 20, 24, 28, 22, 28, 23, 24…
## $ WFGA      <dbl> 69, 66, 59, 53, 64, 52, 54, 47, 56, 51, 51, 52, 54, 52…
## $ WFGM3     <dbl> 11, 7, 6, 3, 7, 4, 4, 6, 5, 2, 9, 5, 3, 10, 8, 8, 6, 7…
## $ WFGA3     <dbl> 29, 23, 14, 7, 20, 14, 13, 14, 14, 6, 16, 13, 13, 18, …
## $ WFTM      <dbl> 17, 11, 16, 18, 15, 20, 25, 28, 12, 6, 19, 11, 21, 13,…
## $ WFTA      <dbl> 26, 14, 22, 25, 23, 27, 31, 37, 14, 11, 23, 18, 25, 24…
## $ WOR       <dbl> 14, 11, 10, 11, 18, 12, 13, 8, 15, 7, 11, 9, 11, 11, 1…
## $ WDR       <dbl> 30, 36, 27, 20, 20, 29, 27, 28, 23, 20, 20, 32, 33, 24…
## $ WAst      <dbl> 17, 22, 18, 15, 17, 8, 4, 12, 15, 13, 14, 7, 7, 16, 16…
## $ WTO       <dbl> 12, 16, 9, 18, 13, 14, 16, 12, 14, 11, 10, 23, 20, 14,…
## $ WStl      <dbl> 5, 10, 7, 13, 8, 3, 10, 2, 11, 8, 4, 4, 6, 8, 4, 6, 7,…
## $ WBlk      <dbl> 3, 7, 4, 1, 2, 8, 8, 2, 4, 4, 3, 6, 6, 3, 8, 1, 2, 5, …
## $ WPF       <dbl> 22, 8, 19, 19, 14, 16, 23, 15, 14, 17, 23, 19, 19, 12,…
## $ LFGM      <dbl> 29, 20, 25, 27, 25, 20, 18, 26, 22, 23, 24, 26, 23, 21…
## $ LFGA      <dbl> 67, 64, 69, 60, 56, 64, 54, 66, 58, 56, 54, 65, 66, 52…
## $ LFGM3     <dbl> 12, 4, 7, 7, 9, 2, 3, 10, 8, 6, 5, 8, 8, 6, 4, 9, 7, 6…
## $ LFGA3     <dbl> 31, 16, 28, 17, 21, 17, 11, 27, 24, 17, 15, 19, 23, 20…
## $ LFTM      <dbl> 14, 7, 14, 12, 15, 11, 18, 7, 8, 9, 15, 11, 15, 6, 25,…
## $ LFTA      <dbl> 31, 7, 21, 17, 20, 13, 22, 10, 13, 10, 25, 21, 20, 8, …
## $ LOR       <dbl> 17, 8, 20, 14, 10, 15, 11, 13, 17, 13, 14, 11, 14, 7, …
## $ LDR       <dbl> 28, 26, 22, 17, 26, 26, 24, 22, 18, 19, 20, 18, 23, 22…
## $ LAst      <dbl> 16, 12, 11, 20, 16, 11, 8, 13, 10, 13, 14, 19, 15, 8, …
## $ LTO       <dbl> 15, 17, 12, 21, 14, 11, 19, 10, 14, 13, 9, 13, 12, 18,…
## $ LStl      <dbl> 5, 10, 2, 6, 5, 8, 5, 7, 6, 6, 4, 6, 11, 7, 3, 3, 4, 8…
## $ LBlk      <dbl> 0, 3, 5, 6, 8, 4, 4, 6, 5, 1, 1, 0, 3, 2, 0, 3, 0, 4, …
## $ LPF       <dbl> 22, 15, 18, 21, 19, 22, 19, 24, 16, 15, 20, 19, 25, 23…
## $ WTeamName <chr> "UNC Asheville", "Arizona", "Arizona St", "C Michigan"…
## $ LTeamName <chr> "TX Southern", "Vermont", "Memphis", "Creighton", "NC …

Let’s break apart the tables into wins and losses so that we can manipulate the same variables at the same time.

df_tournament_win <- df_tournament %>%
  select(Season, DayNum, contains("W"))
colnames(df_tournament_win) <- gsub("W", "", colnames(df_tournament_win))

df_tournament_loss <- df_tournament %>%
  select(Season, DayNum, contains("L"))
colnames(df_tournament_loss) <- gsub("L", "", colnames(df_tournament_loss))

# data manipulation ----
# create new variables that are also easy to read
data_manipulation_tournament <- function(data) {
  data %>%
  select(season = Season,
         n_day = DayNum,
         team_name = TeamName,
         score = Score,
         points_made = FGM,
         points_attempted = FGA,
         points_3_made = FGM3,
         points_3_attempted = FGA3,
         points_free_throws_made = FTM,
         points_free_throws_attempted = FTA,
         rebound_offensive = OR,
         rebound_defensive = DR,
         assist = Ast,
         turnovers = TO,
         steals = Stl,
         blocks = Blk,
         personal_fouls = PF) %>%
  mutate(index = 1:nrow(data),
         points_2_made = points_made - points_3_made,
         points_2_attempted = points_attempted - points_3_attempted,
         points_missed = points_attempted - points_made,
         points_2_missed = points_2_attempted - points_2_made,
         points_3_missed = points_3_attempted - points_3_made,
         points_free_throws_missed = points_free_throws_attempted - points_free_throws_made,
         points_made_percentage = points_made / points_attempted * 100,
         points_2_made_percentage = points_2_made / points_2_attempted * 100,
         points_3_made_percentage = points_3_made / points_3_attempted * 100,
         points_free_throws_made_percentage = points_free_throws_made / points_free_throws_attempted * 100)
}

df_tournament_win <- data_manipulation_tournament(df_tournament_win) %>% mutate(outcome = "win")
glimpse(df_tournament_win)
## Observations: 1,048
## Variables: 29
## $ season                             <dbl> 2003, 2003, 2003, 2003, 2003,…
## $ n_day                              <dbl> 134, 136, 136, 136, 136, 136,…
## $ team_name                          <chr> "UNC Asheville", "Arizona", "…
## $ score                              <dbl> 92, 80, 84, 79, 76, 58, 67, 7…
## $ points_made                        <dbl> 32, 31, 31, 29, 27, 17, 19, 2…
## $ points_attempted                   <dbl> 69, 66, 59, 53, 64, 52, 54, 4…
## $ points_3_made                      <dbl> 11, 7, 6, 3, 7, 4, 4, 6, 5, 2…
## $ points_3_attempted                 <dbl> 29, 23, 14, 7, 20, 14, 13, 14…
## $ points_free_throws_made            <dbl> 17, 11, 16, 18, 15, 20, 25, 2…
## $ points_free_throws_attempted       <dbl> 26, 14, 22, 25, 23, 27, 31, 3…
## $ rebound_offensive                  <dbl> 14, 11, 10, 11, 18, 12, 13, 8…
## $ rebound_defensive                  <dbl> 30, 36, 27, 20, 20, 29, 27, 2…
## $ assist                             <dbl> 17, 22, 18, 15, 17, 8, 4, 12,…
## $ turnovers                          <dbl> 12, 16, 9, 18, 13, 14, 16, 12…
## $ steals                             <dbl> 5, 10, 7, 13, 8, 3, 10, 2, 11…
## $ blocks                             <dbl> 3, 7, 4, 1, 2, 8, 8, 2, 4, 4,…
## $ personal_fouls                     <dbl> 22, 8, 19, 19, 14, 16, 23, 15…
## $ index                              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10…
## $ points_2_made                      <dbl> 21, 24, 25, 26, 20, 13, 15, 1…
## $ points_2_attempted                 <dbl> 40, 43, 45, 46, 44, 38, 41, 3…
## $ points_missed                      <dbl> 37, 35, 28, 24, 37, 35, 35, 2…
## $ points_2_missed                    <dbl> 19, 19, 20, 20, 24, 25, 26, 1…
## $ points_3_missed                    <dbl> 18, 16, 8, 4, 13, 10, 9, 8, 9…
## $ points_free_throws_missed          <dbl> 9, 3, 6, 7, 8, 7, 6, 9, 2, 5,…
## $ points_made_percentage             <dbl> 46.37681, 46.96970, 52.54237,…
## $ points_2_made_percentage           <dbl> 52.50000, 55.81395, 55.55556,…
## $ points_3_made_percentage           <dbl> 37.93103, 30.43478, 42.85714,…
## $ points_free_throws_made_percentage <dbl> 65.38462, 78.57143, 72.72727,…
## $ outcome                            <chr> "win", "win", "win", "win", "…
df_tournament_loss <- data_manipulation_tournament(df_tournament_loss) %>% mutate(outcome = "loss")
glimpse(df_tournament_loss)
## Observations: 1,048
## Variables: 29
## $ season                             <dbl> 2003, 2003, 2003, 2003, 2003,…
## $ n_day                              <dbl> 134, 136, 136, 136, 136, 136,…
## $ team_name                          <chr> "TX Southern", "Vermont", "Me…
## $ score                              <dbl> 84, 51, 71, 73, 74, 53, 57, 6…
## $ points_made                        <dbl> 29, 20, 25, 27, 25, 20, 18, 2…
## $ points_attempted                   <dbl> 67, 64, 69, 60, 56, 64, 54, 6…
## $ points_3_made                      <dbl> 12, 4, 7, 7, 9, 2, 3, 10, 8, …
## $ points_3_attempted                 <dbl> 31, 16, 28, 17, 21, 17, 11, 2…
## $ points_free_throws_made            <dbl> 14, 7, 14, 12, 15, 11, 18, 7,…
## $ points_free_throws_attempted       <dbl> 31, 7, 21, 17, 20, 13, 22, 10…
## $ rebound_offensive                  <dbl> 17, 8, 20, 14, 10, 15, 11, 13…
## $ rebound_defensive                  <dbl> 28, 26, 22, 17, 26, 26, 24, 2…
## $ assist                             <dbl> 16, 12, 11, 20, 16, 11, 8, 13…
## $ turnovers                          <dbl> 15, 17, 12, 21, 14, 11, 19, 1…
## $ steals                             <dbl> 5, 10, 2, 6, 5, 8, 5, 7, 6, 6…
## $ blocks                             <dbl> 0, 3, 5, 6, 8, 4, 4, 6, 5, 1,…
## $ personal_fouls                     <dbl> 22, 15, 18, 21, 19, 22, 19, 2…
## $ index                              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10…
## $ points_2_made                      <dbl> 17, 16, 18, 20, 16, 18, 15, 1…
## $ points_2_attempted                 <dbl> 36, 48, 41, 43, 35, 47, 43, 3…
## $ points_missed                      <dbl> 38, 44, 44, 33, 31, 44, 36, 4…
## $ points_2_missed                    <dbl> 19, 32, 23, 23, 19, 29, 28, 2…
## $ points_3_missed                    <dbl> 19, 12, 21, 10, 12, 15, 8, 17…
## $ points_free_throws_missed          <dbl> 17, 0, 7, 5, 5, 2, 4, 3, 5, 1…
## $ points_made_percentage             <dbl> 43.28358, 31.25000, 36.23188,…
## $ points_2_made_percentage           <dbl> 47.22222, 33.33333, 43.90244,…
## $ points_3_made_percentage           <dbl> 38.70968, 25.00000, 25.00000,…
## $ points_free_throws_made_percentage <dbl> 45.16129, 100.00000, 66.66667…
## $ outcome                            <chr> "loss", "loss", "loss", "loss…
df_tournament_all <- bind_rows(df_tournament_win, df_tournament_loss) %>% mutate(outcome = as_factor(outcome))
glimpse(df_tournament_all)
## Observations: 2,096
## Variables: 29
## $ season                             <dbl> 2003, 2003, 2003, 2003, 2003,…
## $ n_day                              <dbl> 134, 136, 136, 136, 136, 136,…
## $ team_name                          <chr> "UNC Asheville", "Arizona", "…
## $ score                              <dbl> 92, 80, 84, 79, 76, 58, 67, 7…
## $ points_made                        <dbl> 32, 31, 31, 29, 27, 17, 19, 2…
## $ points_attempted                   <dbl> 69, 66, 59, 53, 64, 52, 54, 4…
## $ points_3_made                      <dbl> 11, 7, 6, 3, 7, 4, 4, 6, 5, 2…
## $ points_3_attempted                 <dbl> 29, 23, 14, 7, 20, 14, 13, 14…
## $ points_free_throws_made            <dbl> 17, 11, 16, 18, 15, 20, 25, 2…
## $ points_free_throws_attempted       <dbl> 26, 14, 22, 25, 23, 27, 31, 3…
## $ rebound_offensive                  <dbl> 14, 11, 10, 11, 18, 12, 13, 8…
## $ rebound_defensive                  <dbl> 30, 36, 27, 20, 20, 29, 27, 2…
## $ assist                             <dbl> 17, 22, 18, 15, 17, 8, 4, 12,…
## $ turnovers                          <dbl> 12, 16, 9, 18, 13, 14, 16, 12…
## $ steals                             <dbl> 5, 10, 7, 13, 8, 3, 10, 2, 11…
## $ blocks                             <dbl> 3, 7, 4, 1, 2, 8, 8, 2, 4, 4,…
## $ personal_fouls                     <dbl> 22, 8, 19, 19, 14, 16, 23, 15…
## $ index                              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10…
## $ points_2_made                      <dbl> 21, 24, 25, 26, 20, 13, 15, 1…
## $ points_2_attempted                 <dbl> 40, 43, 45, 46, 44, 38, 41, 3…
## $ points_missed                      <dbl> 37, 35, 28, 24, 37, 35, 35, 2…
## $ points_2_missed                    <dbl> 19, 19, 20, 20, 24, 25, 26, 1…
## $ points_3_missed                    <dbl> 18, 16, 8, 4, 13, 10, 9, 8, 9…
## $ points_free_throws_missed          <dbl> 9, 3, 6, 7, 8, 7, 6, 9, 2, 5,…
## $ points_made_percentage             <dbl> 46.37681, 46.96970, 52.54237,…
## $ points_2_made_percentage           <dbl> 52.50000, 55.81395, 55.55556,…
## $ points_3_made_percentage           <dbl> 37.93103, 30.43478, 42.85714,…
## $ points_free_throws_made_percentage <dbl> 65.38462, 78.57143, 72.72727,…
## $ outcome                            <fct> win, win, win, win, win, win,…

Now let’s compare some winning and losing stats.

Across All Seasons

Most Tournament Game Wins

df_tournament_all %>%
  group_by(team_name, outcome) %>%
  count() %>%
  ungroup() %>%
  filter(outcome == "win") %>%
  top_n(10) %>%
  arrange(desc(n)) %>%
  mutate(team_name = factor(team_name, levels = team_name)) %>%
  ggplot(., aes(x = team_name, y = n, fill = n)) + 
    geom_col() +
    scale_fill_distiller(palette = "Blues", direction = -1) +
    theme_minimal() + 
    theme(axis.text.x = element_text(hjust = 1, angle = 45)) +
    labs(title = "Most Wins",
         x = "\nTeam",
         y = "Frequency\n",
         fill = "Frequency\n")
## Selecting by n

Most Championship Wins

df_most_wins <- df_tournament_all %>%
  group_by(season, team_name, outcome) %>%
  count() %>%
  ungroup() %>%
  filter(outcome == "win", n == 6) %>%
  select(-n) %>%
  group_by(team_name) %>%
  count() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  mutate(team_name = factor(team_name, levels = team_name))

ggplot(df_most_wins, aes(x = team_name, y = n, fill = n)) + 
    geom_col() +
    scale_fill_distiller(palette = "Blues", direction = -1) +
    theme_minimal() + 
    theme(axis.text.x = element_text(hjust = 1, angle = 45)) +
    labs(title = "Most Championship Wins",
         x = "\nTeam",
         y = "Frequency\n",
         fill = "Frequency\n")

Metrics

tournament_metrics <- df_tournament_all %>%
  select(-c(season, n_day, team_name, index, outcome)) %>%
  colnames()

for (metric in tournament_metrics) {
  figure <- df_tournament_all %>%
    ggplot(., aes(x = eval(as.name((metric))), fill = outcome)) + 
      geom_density(alpha = 0.5, color = 0) +
      theme_minimal() + 
      labs(x = paste0("\n",metric),
           y = "Density\n")
  print(figure)
}

It looks like the winning team takes slightly less shots, misses less, has more offensive rebounds, more assists, less turnovers, more steals, more blocks, and less personal fouls.

By Season

Most Overall Wins

df_most_wins <- df_tournament_all %>%
  group_by(season, team_name, outcome) %>%
  count() %>%
  ungroup() %>%
  filter(outcome == "win") %>%
  group_by(season) %>%
  top_n(10) %>%
  ungroup() %>%
  arrange(season, desc(n)) %>%
  mutate(order = row_number())
## Selecting by n
ggplot(df_most_wins, aes(x = order, y = n, fill = n)) + 
    geom_col() +
    scale_fill_distiller(palette = "Blues", direction = -1) +
    theme_minimal() + 
    theme(axis.text.x = element_text(hjust = 1, angle = 45)) +
    labs(title = "Most Wins During Regular Season",
         x = "\nTeam",
         y = "Frequency\n",
         fill = "Frequency\n") +
  facet_wrap(~ season, scales = "free") +
  scale_x_continuous(breaks = df_most_wins$order,
                     labels = df_most_wins$team_name,
                     expand = c(0,0))

Metrics

for (metric in tournament_metrics) {
  figure <- df_tournament_all %>%
    ggplot(., aes(x = eval(as.name((metric))), fill = outcome)) + 
      geom_density(alpha = 0.5, color = 0) +
      theme_minimal() + 
      labs(x = paste0("\n",metric),
           y = "Density\n") + 
      facet_wrap(~ season)
  print(figure)
}

These metrics look pretty consistent across the seasons.